COVID-19 Analysis, Visualization & Comparisons
#Install libraries
#!pip install --upgrade pip
#!pip install calmap
#!pip install us
#!pip install pycountry_convert
#!pip install plotly==3.8.1
#!pip install cufflinks
!pip install psutil
!pip install chart-studio==1.0.0
#Load libraries
import json
from datetime import timedelta
from urllib.request import urlopen
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objs as go
from plotly.subplots import make_subplots
import plotly.io as pio
pio.renderers
pio.renderers.default = "svg"
from plotly.offline import plot, iplot, init_notebook_mode
init_notebook_mode(connected = True)
import warnings
warnings.filterwarnings('ignore')
#color pallette
cnf, dth, rec, act = '#393e46', '#ff2e63', '#21bf73', '#fe9801'
#full data
full_table = pd.read_csv('../data/covid_19_clean_complete.csv')
full_table.head()
#Grouped by day, country
full_grouped = pd.read_csv('../data/full_grouped.csv')
full_grouped['Date'] = pd.to_datetime(full_grouped['Date'])
full_grouped.head()
#Day wise
day_wise = pd.read_csv('../data/datasets_494766_1376807_day_wise.csv')
day_wise['Date'] = pd.to_datetime(day_wise['Date'])
day_wise.head()
#Country wise
country_wise = pd.read_csv('../data/datasets_494766_1376807_country_wise_latest.csv')
country_wise = country_wise.replace('', np.nan).fillna(0)
country_wise.head()
#Worldmeter data
worldmeter_data = pd.read_csv('../data/datasets_494766_1376807_worldometer_data.csv')
worldmeter_data = worldmeter_data.replace('', np.nan).fillna(0)
worldmeter_data.head()
#Latest
temp = day_wise[['Date', 'Deaths', 'Recovered', 'Active']].tail(1)
temp = temp.melt(id_vars = 'Date', value_vars = ['Active', 'Deaths', 'Recovered'])
temp
fig = px.treemap(temp, path = ['variable'], values = 'value', height = 225,
color_discrete_sequence = [act, rec, dth])
fig.data[0].textinfo = 'label+text+value'
fig.show()
#Maps
def plot_map(df, col, pal):
df = df[df[col]>0]
fig = px.choropleth(df, locations='Country/Region', locationmode='country names',
color = col, hover_name='Country/Region',
title = col, hover_data=[col], color_continuous_scale=pal)
fig.show()
plot_map(country_wise, 'Confirmed', 'matter')
plot_map(country_wise, 'Deaths', 'matter')
plot_map(country_wise, 'Deaths / 100 Cases', 'matter')
#Over the time
fig = px.choropleth(full_grouped, locations = 'Country/Region',
color=np.log(full_grouped['Confirmed']),
locationmode='country names', hover_name='Country/Region',
animation_frame=full_grouped['Date'].dt.strftime('%Y-%m-%d'),
title='Cases over time', color_continuous_scale=px.colors.sequential.matter)
fig.update(layout_coloraxis_showscale=False)
fig.show()
#Cases over the time
def plot_daywise(col, hue):
fig = px.bar(day_wise, x = 'Date', y = col, width = 700, color_discrete_sequence = [hue])
fig.update_layout(title = col, xaxis_title = '', yaxis_title = '')
fig.show()
def plot_daywise_line(col, hue):
fig = px.line(day_wise, x = 'Date', y = col, width = 700, color_discrete_sequence= [hue])
fig.update_layout(title = col, xaxis_title = '', yaxis_title = '')
fig.show()
temp = full_grouped.groupby('Date')['Recovered', 'Deaths', 'Active'].sum().reset_index()
temp = temp.melt(id_vars = 'Date', value_vars = ['Recovered', 'Deaths', 'Active'],
var_name = 'Case', value_name = 'Count')
temp.head()
fig = px.area(temp, x = 'Date', y = 'Count', color = 'Case', height = 600, width = 700,
title = 'Cases over time', color_discrete_sequence = [rec, dth, act])
fig.update_layout(xaxis_rangeslider_visible = True)
fig.show()
plot_daywise('Confirmed', '#333333')
plot_daywise('Active', '#333333')
plot_daywise('New cases', '#333333')
plot_daywise('Deaths', dth)
plot_daywise('New deaths', dth)
plot_daywise('Recovered', rec)
plot_daywise('New recovered', rec)
plot_daywise_line('Deaths / 100 Cases', dth)
plot_daywise_line('Deaths / 100 Recovered', dth)
plot_daywise_line('Recovered / 100 Cases', rec)
plot_daywise('No. of countries', '#035aa6')
temp = day_wise[['Date', 'Recovered', 'Active']]
temp = temp.melt(id_vars = 'Date', value_vars = ['Recovered', 'Active'],
var_name = 'Variable', value_name = 'Count')
px.line(temp, x = 'Date', y = 'Count', color = 'Variable')
#Top 20 Countries
def plot_hbar(df, col, n, hover_data = []):
fig = px.bar(df.sort_values(col).tail(n),
x = col, y = 'Country/Region', color = 'WHO Region',
text = col, orientation = 'h', width = 700, hover_data = hover_data,
color_discrete_sequence = px.colors.qualitative.Dark2)
fig.update_layout(title = col, xaxis_title = '', yaxis_title = '',
yaxis_categoryorder = 'total ascending',
uniformtext_minsize = 8, uniformtext_mode = 'hide')
fig.show()
def plot_hbar_wm(col, n, min_pop = 1000000, sort = 'descending'):
df = worldmeter_data[worldmeter_data['Population'] > min_pop]
df = df.sort_values(col, ascending = True).tail(n)
fig = px.bar(df,
x = col, y = 'Country/Region', color = 'WHO Region',
text = col, orientation = 'h', width = 700,
color_discrete_sequence = px.colors.qualitative.Dark2)
fig.update_layout(title = col+' (Only countries with > 1M Pop)',
xaxis_title = '', yaxis_title = '',
yaxis_categoryorder = 'total ascending',
uniformtext_minsize = 8, uniformtext_mode = 'hide')
fig.show()
plot_hbar(country_wise, 'Confirmed', 15)
plot_hbar(country_wise, 'Active', 15)
plot_hbar(country_wise, 'New cases', 15)
plot_hbar(country_wise, 'Deaths', 15)
plot_hbar(country_wise, 'New deaths', 15)
plot_hbar(country_wise, 'Deaths / 100 Cases', 15)
plot_hbar(country_wise, 'Recovered', 15)
plot_hbar(country_wise, 'New recovered', 15)
plot_hbar(country_wise, 'Recovered / 100 Cases', 15)
plot_hbar(country_wise, '1 week change', 15)
plot_hbar(country_wise, '1 week % increase', 15)
plot_hbar_wm('Tot Cases/1M pop', 15, 100000)
plot_hbar_wm('Deaths/1M pop', 15, 1000000)
plot_hbar_wm('TotalTests', 15, 1000000)
plot_hbar_wm('Tests/1M pop', 15)
#Data vs
def plot_stacked(col):
fig = px.bar(full_grouped, x = 'Date', y = col, color = 'Country/Region',
height = 600, title = col,
color_discrete_sequence = px.colors.cyclical.mygbm)
fig.update_layout(showlegend = True)
fig.show()
def plot_line(col):
fig = px.line(full_grouped, x = 'Date', y =col, color = 'Country/Region',
height = 600, title = col,
color_discrete_sequence = px.colors.cyclical.mygbm)
fig.update_layout(showlegend = True)
fig.show()
plot_stacked('Confirmed')
plot_stacked('Deaths')
plot_stacked('New cases')
plot_stacked('Active')
plot_line('Confirmed')
plot_line('Deaths')
plot_line('New cases')
plot_line('Active')
temp = pd.merge(full_grouped[['Date', 'Country/Region', 'Confirmed', 'Deaths']],
day_wise[['Date', 'Confirmed', 'Deaths']], on = 'Date')
temp['% Confirmed'] = round(temp['Confirmed_x']/temp['Confirmed_y'], 3) * 100
temp['% Deaths'] = round(temp['Deaths_x']/temp['Deaths_y'], 3) * 100
temp.head()
fig = px.bar(temp, x = 'Date', y = '% Confirmed', color = 'Country/Region',
range_y = (0, 100), title = '% of Cases from each country',
color_discrete_sequence = px.colors.qualitative.Prism)
fig.show()
fig = px.bar(temp, x = 'Date', y = '% Deaths', color = 'Country/Region',
range_y = (0, 100), title = '% of Cases from each country',
color_discrete_sequence = px.colors.qualitative.Prism)
fig.show()
def gt_n(n):
countries = full_grouped[full_grouped['Confirmed'] > n]['Country/Region'].unique()
temp = full_table[full_table['Country/Region'].isin(countries)]
temp = temp.groupby(['Country/Region', 'Date'])['Confirmed'].sum().reset_index()
temp = temp[temp['Confirmed'] > n]
min_date = temp.groupby('Country/Region')['Date'].min().reset_index()
min_date.columns = ['Country/Region', 'Min Date']
from_nth_case = pd.merge(temp, min_date, on = 'Country/Region')
from_nth_case['Date'] = pd.to_datetime(from_nth_case['Date'])
from_nth_case['Min Date'] = pd.to_datetime(from_nth_case['Min Date'])
from_nth_case['N days'] = (from_nth_case['Date'] - from_nth_case['Min Date']).dt.days
fig = px.line(from_nth_case, x = 'N days', y = 'Confirmed', color = 'Country/Region',
title = 'N days from' + str(n) + ' case', height = 600)
fig.show()
gt_n(100000)
fig = px.scatter(country_wise.sort_values('Deaths', ascending = False).iloc[:20, :],
x = 'Confirmed', y = 'Deaths', color = 'Country/Region', size = 'Confirmed',
height = 700, text = 'Country/Region', log_x = True, log_y = True,
title = 'Deaths vs Confirmed (Scale is in log10)')
fig.update_traces(textposition = 'top center')
fig.update_layout(showlegend = False)
fig.update_layout(xaxis_rangeslider_visible = True)
fig.show()
#Composition of Cases
def plot_treemap(col):
fig = px.treemap(country_wise, path = ['Country/Region'], values = col, height = 700,
title = col, color_discrete_sequence = px.colors.qualitative.Dark2)
fig.data[0].textinfo = 'label+text+value'
fig.show()
plot_treemap('Confirmed')
plot_treemap('Deaths')
def plot_bubble(col, pal):
temp = full_grouped[full_grouped[col] > 0].sort_values('Country/Region',
ascending = False)
fig = px.scatter(temp, x = 'Date', y = 'Country/Region', size = col, color = col, height = 3000,
color_continuous_scale = pal)
fig.update_layout(yaxis = dict(dtick = 1))
fig.update(layout_coloraxis_showscale = False)
fig.show()
plot_bubble('New cases', 'Viridis')
plot_bubble('Active', 'Viridis')
temp = full_grouped[['Date', 'Country/Region', 'New cases']]
temp['New cases reported ?'] = temp['New cases'] != 0
temp['New cases reported ?'] = temp['New cases reported ?'].astype(int)
fig = go.Figure(data = go.Heatmap(
z = temp['New cases reported ?'],
x = temp['Date'],
y = temp['Country/Region'],
colorscale = 'Emrld',
showlegend = False,
text = temp['New cases reported ?']))
fig.update_layout(yaxis = dict(dtick = 1))
fig.update_layout(height = 3000)
fig.show()